'''
This script submits each user to Botometer, gets its rating, puts that rating in per tweet, and then determines if bot or not.

It comes after 02_ProtestImageAnalysis_ProcessRawData_v4.Rmd and before 03_AggregateRawTweets_ProtestImageAnalysis_v1.py.

Designed to work on Python3.5.
'''

##########################
##
##	PACKAGES, OPTIONS
##
##########################
import botometer
import os
import pandas as pd
import json
import time

os.chdir('<path/to/Replication/>')

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 1000)

##########################
##
##	DATA
##
##########################
data = pd.read_csv('./Data/02_processedData/c_DonghyeonAlexmerged_classifiers_shortSpain.csv')
users = data['user_id'][data['user_id'].isnull() == False]  # Only one tweet with nan for user_id.  Unclear why there is user.id and user_id
users = list(set(users))
users = [int(item) for item in users]

mashape_key = "input_your_key_here"
twitter_app_auth = {
    'consumer_key': 'use_your_own',
    'consumer_secret': 'use_your_own',
    'access_token': 'use_your_own',
    'access_token_secret': 'use_your_own',
  }


bom = botometer.Botometer(wait_on_ratelimit=True,
                          mashape_key=mashape_key,
                          **twitter_app_auth)


##########################
##
##	BOTOMETER RESULTS
##
##########################
# 180 requests per 15 minutes.  10361 users.  Should take about 15 hours.
i = 0  # To track where am when have to stop
results = {}
users_cut = users[i:]  # To keep track of where am when have to stop
for screen_name, result in bom.check_accounts_in(users_cut):
	results[screen_name] = result
	print(i)
	i += 1


# Above tripped rate limit, so will handle on my own
results = {}

allowed = 180
minutes = 15
toSleep = (minutes * 60) / allowed


i = 4729
users_cut = users[i:]  # To keep track of where am when have to stop

for screen_name, result in bom.check_accounts_in(users_cut):
	results[screen_name] = result
	print(i)
	i += 1
	time.sleep(toSleep-1.5)  # -1.5 as guestimate of time it takes to download

### Save
with open('./Data/02_processedData/z_botometerResults.txt', 'w') as f:
    json.dump(results, f)





##########################
##
##	MERGE
##
##########################
# Read in
with open('Data/02_processedData/z_botometerResults.txt', 'r') as f:
	saved_results = json.load(f)

# Below should work except for the one nan value.
i = 0
temp = list()
for item in data['user_id']:
	if str(item) == 'nan':
		temp.append('nan')

	if str(item) != 'nan':
		entry = saved_results[str(int(item))]
		# try:
		#blah['bot_cap_universal'] = entry['cap']['universal']
		# except:
		# 	print(i)
		if 'cap' in entry:
			temp.append(entry['cap']['universal'])
		if 'cap' not in entry:
			temp.append('User_no_longer_available')

data['bot_cap_universal'] = temp



##########################
##
##	SAVE OUT
##
##########################
data.to_csv('./Data/02_processedData/c2_DonghyeonAlexmerged_classifiers_shortSpain.csv', index=False)


